# Dr. M. Baron, Statistical Machine Learning class, STAT-427/627

# GEOMETRY of LDA and QDA

# Load the Field Goal data directly from the URL
url = "http://fs2.american.edu/~baron/627/R/Field%20goals.txt"
FG = pd.read_csv(url, delim_whitespace=True, header=None)

# Assign the columns
distance = FG[0]  # V1
made = FG[1]      # V2
week = FG[2]      # V3

FG.head

<bound method NDFrame.head of       0  1   2
0    30  1   1
1    41  1   1
2    50  1   1
3    22  1   1
4    33  1   1
..   .. ..  ..
943  42  1  17
944  46  1  17
945  47  1  17
946  52  0  17
947  51  0  17

[948 rows x 3 columns]>

# Plotting: week vs. distance, colored by whether the goal was made (green for made, red for missed)
plt.scatter(week, distance, c=made + 2, cmap='RdYlGn')  # +2 shifts colors to make green/red
plt.xlabel('Week')
plt.ylabel('Distance')
plt.title('Field Goal Attempts (Red: Missed, Green: Made)')
plt.show()

# Apply LDA to predict success of a field goal attempt
lda = LDA()
lda.fit(FG[[0, 2]], made)  # Use 'distance' and 'week' as predictors (V1, V3)

# Predict using LDA with cross-validation
y_pred_lda = lda.predict(FG[[0, 2]])
accuracy_lda = np.mean(y_pred_lda == made)
print(f"LDA Classification Rate: {accuracy_lda * 100:.2f}%")

LDA Classification Rate: 80.91%

# Plotting the LDA result
plt.scatter(week, distance, c=pd.factorize(y_pred_lda)[0] + 2, cmap='RdYlGn')
plt.xlabel('Week')
plt.ylabel('Distance')
plt.title('LDA Decision Boundary for Field Goals')
plt.show()

# Apply QDA to predict success of a field goal attempt
qda = QDA()
qda.fit(FG[[0, 2]], made)  # Use 'distance' and 'week' as predictors (V1, V3)

# Predict using QDA with cross-validation
y_pred_qda = qda.predict(FG[[0, 2]])

# Plotting the QDA result
plt.scatter(week, distance, c=pd.factorize(y_pred_qda)[0] + 2, cmap='RdYlGn')
plt.xlabel('Week')
plt.ylabel('Distance')
plt.title('QDA Decision Boundary for Field Goals')
plt.show()

# A curvy boundary (QDA) is expected as opposed to the linear boundary (LDA)